package SparkStreaming

import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}

object ScalaHDFSWordCount {

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[2]").setMaster("ScalaHDFSWordCount")

    //scala中，创建的是StreamingContext
    val ssc = new StreamingContext(conf, Seconds(5))

    //必须保证有该目录，否则报错
    val lines = ssc.textFileStream("hdfs://spark1:9000/wordcount_dir")
    val words = lines.flatMap {
      _.split(" ")
    }
    val pairs = words.map {
      word => (word, 1)
    }
    val wordCounts = pairs.reduceByKey {
      _ + _
    }
    wordCounts.print()
    ssc.start()
    ssc.awaitTermination()
  }
}
